# === load relevant libraries
library(data.table)
library(ggplot2)
library(DescTools)

# === Figure 1(a). Morningstar fund rating by style
rm(list = ls())

# compute average fund ratings by 3x3 styles
data = readRDS('input_data/monthly_fund_data.RDS')
data = data[(yyyymm >= 199101) & (sectorFund == 0), list(yyyymm, fundno, category, rating)]
data = data[!is.na(rating)]
data = data[, list(rating = mean(rating)), list(yyyymm, category)]
data[, date := as.Date(as.character(100*yyyymm+1), '%Y%m%d')]

# plot
ggplot(data, aes(x = date, y = rating, color = category)) + geom_line() + theme_classic() + 
  geom_vline(xintercept = as.Date('2002-06-01'), lty = 3, col = 3, lwd = 1)
rm(event_data)

# === Figure 1(b).  Fund flow by Morningstar rating
rm(list = ls())

# demean fund flows by period
data = readRDS('input_data/monthly_fund_data.RDS')
data = data[aum_1 >= 10, list(yyyymm, fundno, category, rating_1, flow, aum_1)]
data = merge(data, data[, list(x = mean(flow)), yyyymm], by = 'yyyymm')
data[, flow := flow - x]
data[, x := NULL]

# plot by (year, rating)
data[, yyyy := floor(yyyymm/100)]
data = data[, list(flow = mean(flow)), list(yyyy, rating_1)]
data[, lab := paste0(rating_1, ' star')]
data[rating_1 != 1, lab := paste0(lab, 's')]

ggplot(data, aes(x = yyyy, y = flow, color = lab)) + 
  geom_line(lwd = 1) + geom_vline(xintercept = 2002.5, lty = 3, col = 2, lwd = 1) + theme_classic() + 
  labs(x = NULL, y = 'Monthly fund flow') + scale_y_continuous(label = scales::percent_format(accuracy = 1)) + 
  theme(legend.position = 'top', legend.title = element_blank())

# === Figure 1(c) and (d). Style fund flows and returns
rm(list = ls())

# style-level data
data = readRDS('input_data/morningstar_style_data.RDS')
data = data[, list(yyyymm, category, ret, flow, expSum)]

# in each period, sort styles by expSum into bin = 1 to 9 (lowest to highest)
sortData = data[, list(yyyymm, category, expSum)]
sortData = sortData[order(yyyymm, expSum)]
for (this in unique(sortData[, yyyymm])){
  sortData[yyyymm == this, bin := 1:9]
}
rm(this)

data[, expSum := NULL]
data = as.data.table(melt(data, id.vars = c('yyyymm','category')))
names(data)[3] = 'var'

# get subsequent 36 months of returns and flows
tmp = unique(data[, list(yyyymm)]) # convert months into an index for ease of manipulation
tmp[, idx := 1:nrow(tmp)]
data = merge(data, tmp); rm(tmp)

out = data.table()
for (i in 0:36){
  out = rbind(out, data[, list(idx = idx-i, hor = i, category, var, value)])
}
out = merge(out, unique(data[, list(idx, yyyymm)]), by = 'idx')
out = merge(out, sortData[, list(yyyymm, category, bin)], by = c('yyyymm','category'), all.x = T)
out[, idx := NULL]
rm(sortData)

# plot cumulative returns
out[, period := ifelse(yyyymm > 200206, 'after 2002', 'before 2002')]
out = out[, list(value = mean(value)), list(bin, hor, var, period)]

# get difference between top and bottom styles
out = merge(out[bin == 9, list(hor, var, period, top = value)], 
            out[bin == 1, list(hor, var, period, bottom = value)], by = c('hor','var', 'period'))
out = out[, list(hor, var, period, value = top - bottom)]
out = out[order(hor)]

# compute cumulative values
tmp = unique(out[, list(var, period)])
for (i in 1:nrow(tmp)){
  out[(var == tmp[i, var]) & (period == tmp[i, period]), cum_value := cumsum(value)]
}
data = copy(out); rm(out, i, tmp)

# read pre-computed bootstrap standard errors
tmp = readRDS('input_data/morningstar_style_impulse_response_standard_error.RDS')
data = merge(data, tmp, by = c('hor','var','period')); rm(tmp)

# = Plot flow: Figure 1(c)
ggplot(data[var == 'flow'], aes(x = hor, y = cum_value, fill = period)) + geom_line() + 
  geom_ribbon(aes(ymin = cum_value - 1.96*se, ymax = cum_value + 1.96*se), alpha = .2) + 
  labs(x = 'Months', y = 'Cumulative flow: top minus bottom style') + scale_y_continuous(label = scales::percent_format(accuracy = 1)) + 
  theme_classic() + theme(legend.position = 'top', legend.title = element_blank()) + coord_cartesian(ylim = c(-.1, .3))

# = Plot return: Figure 1(d)
ggplot(data[var == 'ret'], aes(x = hor, y = cum_value, fill = period)) + geom_line() + 
  geom_ribbon(aes(ymin = cum_value - 1.96*se, ymax = cum_value + 1.96*se), alpha = .2) + 
  labs(x = 'Months', y = 'Cumulative return: top minus bottom style') + scale_y_continuous(label = scales::percent_format(accuracy = 1)) + 
  theme_classic() + theme(legend.position = 'top', legend.title = element_blank()) + 
  coord_cartesian(ylim = c(-.05, .2))

